Chapter 4 MAG catalogue

load("data/data.Rdata")

4.1 Genome phylogeny

# Generate the phylum color heatmap
phylum_heatmap <- read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
    right_join(genome_metadata, by=join_by(phylum == phylum)) %>%
    arrange(match(genome, genome_tree$tip.label)) %>%
    select(genome,phylum) %>%
    mutate(phylum = factor(phylum, levels = unique(phylum))) %>%
    column_to_rownames(var = "genome")

# Generate  basal tree
circular_tree <- force.ultrametric(genome_tree, method="extend") %>% # extend to ultrametric for the sake of visualisation
    ggtree(., layout="fan", open.angle=10, size=0.5)
***************************************************************
*                          Note:                              *
*    force.ultrametric does not include a formal method to    *
*    ultrametricize a tree & should only be used to coerce    *
*   a phylogeny that fails is.ultrametric due to rounding --  *
*    not as a substitute for formal rate-smoothing methods.   *
***************************************************************
# Add phylum ring
circular_tree <- gheatmap(circular_tree, phylum_heatmap, offset=0.55, width=0.1, colnames=FALSE) +
        scale_fill_manual(values=phylum_colors) +
        geom_tiplab2(size=1, hjust=-0.1) +
        theme(legend.position = "none", plot.margin = margin(0, 0, 0, 0), panel.margin = margin(0, 0, 0, 0))

# Flush color scale to enable a new color scheme in the next ring
circular_tree <- circular_tree + new_scale_fill()

# Add completeness ring
circular_tree <- circular_tree +
        new_scale_fill() +
        scale_fill_gradient(low = "#d1f4ba", high = "#f4baba") +
        geom_fruit(
                data=genome_metadata,
                geom=geom_bar,
                mapping = aes(x=completeness, y=genome, fill=contamination),
                offset = 0.55,
                orientation="y",
              stat="identity")

# Add genome-size ring
circular_tree <-  circular_tree +
        new_scale_fill() +
        scale_fill_manual(values = "#cccccc") +
        geom_fruit(
             data=genome_metadata,
             geom=geom_bar,
             mapping = aes(x=length, y=genome),
                 offset = 0.05,
                 orientation="y",
         stat="identity")

# Add text
circular_tree <-  circular_tree +
        annotate('text', x=2.7, y=0, label='            Phylum', family='arial', size=3.5) +
        annotate('text', x=3.1, y=0, label='                         Genome quality', family='arial', size=3.5) +
        annotate('text', x=3.5, y=0, label='                     Genome size', family='arial', size=3.5)

#Plot circular tree
circular_tree %>% open_tree(30) %>% rotate_tree(90)

## Taxonomy overview

4.1.1 Domains

genome_metadata %>% 
  group_by(domain) %>%
  summarise(mag_n=n()) %>%
  arrange(-mag_n) %>%
  tt()
tinytable_xr9nl919jfd36g1qeqte
domain mag_n
d__Bacteria 1026
d__Archaea 9

4.1.2 Archaea

genome_metadata %>% 
  filter(domain == "d__Archaea") %>% 
  group_by(phylum) %>%
  summarise(mag_n=n()) %>%
  arrange(-mag_n) %>%
  tt()
tinytable_m3tsy20yj49g7g2ffsd9
phylum mag_n
p__Halobacteriota 4
p__Thermoproteota 2
p__Iainarchaeota 1
p__Methanobacteriota 1
p__Nanoarchaeota 1